from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

model_path = '/share/huggingface/Llama-2-7b-ms'
tokenizer = AutoTokenizer.from_pretrained(model_path, device_map='auto')
model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto', torch_dtype=torch.bfloat16)
input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt").to('cuda')

model(**input_ids)